{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "c38a1dbf",
   "metadata": {},
   "outputs": [],
   "source": [
    "%run -i \"../util/file_utils.ipynb\"\n",
    "%run -i \"../util/lang_utils.ipynb\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aac16fc5",
   "metadata": {},
   "source": [
    "# Finding triplets using spaCy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "8da6bb59",
   "metadata": {},
   "outputs": [],
   "source": [
    "sentences = [\n",
    "    \"The big black cat stared at the small dog.\", \n",
    "    \"Jane watched her brother in the evenings.\", \n",
    "    \"Nick was driving to Madrid.\"\n",
    "]\n",
    "verb_patterns = [\n",
    "    [{\"POS\": \"VERB\"}],\n",
    "    [{\"POS\": \"VERB\"}, {\"POS\": \"ADP\"}],\n",
    "    [{\"POS\": \"AUX\", \"OP\": \"?\"}, {\"POS\": \"VERB\"}, {\"POS\": \"ADP\", \"OP\": \"?\"}]\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "e8e7bc1c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from spacy.matcher import Matcher\n",
    "matcher = Matcher(small_model.vocab)\n",
    "matcher.add(\"VP\", verb_patterns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "7775a62a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_verb_phrase(doc, matcher):\n",
    "    matches = matcher(doc)\n",
    "    verb_phrases = [match for match in matches if small_model.vocab.strings[match[0]] == \"VP\"]\n",
    "    verb_phrase_spans = [doc[match[1]:match[2]] for match in verb_phrases]\n",
    "    verb_phrase_spans.sort(key=len, reverse=True)\n",
    "    verb_phrase = verb_phrase_spans[0]\n",
    "    root = verb_phrase[0]\n",
    "    for token in verb_phrase:\n",
    "        if token.dep_ == \"ROOT\":\n",
    "            root = token\n",
    "    return verb_phrase, root"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "c59b18a6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The big black cat \t stared at \t the small dog\n",
      "Jane \t watched \t her brother\n",
      "Nick \t was driving to \t Madrid\n"
     ]
    }
   ],
   "source": [
    "for sentence in sentences:\n",
    "    doc = small_model(sentence)\n",
    "    verb_phrase, root = find_verb_phrase(doc, matcher)\n",
    "    subject_phrase = get_subject_phrase(doc)\n",
    "    object_phrase = get_object_phrase(doc)\n",
    "    prep_phrases = get_prepositional_phrase_objs(doc)\n",
    "    if object_phrase is None:\n",
    "        object_phrase = prep_phrases[0]\n",
    "    print(subject_phrase, \"\\t\", verb_phrase, \"\\t\", object_phrase)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0bca98b4",
   "metadata": {},
   "source": [
    "# Finding triplets using GPT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "50a4ab8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import openai\n",
    "openai.api_key = OPEN_AI_KEY"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "20385a53",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"finish_reason\": \"stop\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"text\": \"\\n\\n{\\\"subject\\\": \\\"Nick\\\", \\\"verb\\\": \\\"was driving\\\", \\\"object\\\": \\\"to Madrid\\\"}\"\n",
      "    }\n",
      "  ],\n",
      "  \"created\": 1687940898,\n",
      "  \"id\": \"cmpl-7WKp8Cv4RVOXN4GE3RBd53gRyNlOe\",\n",
      "  \"model\": \"text-davinci-003\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"usage\": {\n",
      "    \"completion_tokens\": 22,\n",
      "    \"prompt_tokens\": 48,\n",
      "    \"total_tokens\": 70\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "prompt=\"\"\"Find subject, verb, object triplets in the following sentence.\n",
    "Create a python dictionary structure of the form: {\"subject\": Subject, \"verb\": Verb, \"object\": Object}\n",
    "Sentence: Nick was driving to Madrid.\"\"\"\n",
    "response = openai.Completion.create(\n",
    "    model=\"text-davinci-003\",\n",
    "    prompt=prompt,\n",
    "    temperature=0,\n",
    "    max_tokens=256,\n",
    "    top_p=1.0,\n",
    "    frequency_penalty=0,\n",
    "    presence_penalty=0\n",
    ")\n",
    "print(response)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
